# Extreme weather events do not increase political parties' environmental attention

#'   This script
#'      > cleans press release data into party*day form with
#'      > *N* PRs across all *issues*
#'      > *N* PRs from climate dictionary
#'      > *Party Characteristics*
#'      > *Extreme weather events*


# Tim Wappenhans, António Valentim, Heike Klüver, and Lukas F. Stoetzer
# First: 23-02-2023
# Last: 08-11-2023


 
# PACKAGES ---------------------------------------------------------------------
if (!require("pacman")) install.packages("pacman")
pacman::p_load(
  tidyverse,
  lubridate,
  here,
  stringi
  )


# LOAD DATA --------------------------------------------------------------------
df.all <- read_csv(here("data/data_input/partypress.csv"))



# CLEANING INTO PARTY*DAY FORMAT -----------------------------------------------

# only select those who have parlgov ID and have been in parliament (valid cabinet_id)
parl_list <- read_csv(here("data/data_output/cabinet_daily.csv"))  |>  
  distinct(parlgov_id) |>
  pull()

df.all <- df.all |> 
  filter(parlgov_id %in% parl_list)

# Bring from event-level to party*day 
df.daily <- df.all |> 
  group_by(parlgov_id, date, issue_multi) |> 
  summarize(pr_n = n())  |> 
  ungroup()  |> 
  # bring in right format: each row one party*day, each column count of issue*PRs
  pivot_wider(id_cols = c(parlgov_id, date), 
              names_from = c(issue_multi), 
              values_from = c(pr_n), 
              names_prefix = "issue_") %>% # (magrittr pipe on purpose)
  mutate(pr_total = select(., starts_with("issue_")) %>% rowSums(.,na.rm = TRUE))


# Expand to all possible dates
party_days <- df.all |> 
  mutate(start_date = min(date),
         end_date = max(date)) |> 
  select(parlgov_id, start_date, end_date) |> 
  distinct(parlgov_id, start_date, end_date) |> 
  group_by(parlgov_id) |> 
  mutate(date = list(seq.Date(start_date, end_date, 
                              by = "day")))  |> 
  unnest(cols = c(date)) |> 
  select(parlgov_id, date)

# join data to all possible party*days
df.daily <- left_join(party_days, df.daily,
                      by = c("parlgov_id", "date"))


#' Each row now contains:
#'    > all possible *party-days* 
#'    > *N* of PRs across all *issues* 



# MERGE PARTY CHARACTERISTICS --------------------------------------------------
# Government participation
df.cabinet <- read_csv(here("data/data_output/cabinet_daily.csv")) |> 
  select(cabinet_id, parlgov_id, date, cabinet_party)

df.daily <- left_join(df.daily, df.cabinet,
                      by = c("parlgov_id", "date"))
glimpse(df.daily)

# join party family and country
df.family <- read_csv(here("data/data_input/view_party.csv")) |>  
  mutate( 
    parlgov_id = party_id,
    country_name = tolower(country_name),
    country_name = ifelse(country_name == "united kingdom",
                          "uk", country_name)
  ) |>
  select(parlgov_id, family_name_short, country_name) |> 
  distinct()

# parties don't change families
df.family |> group_by(parlgov_id) |> summarize(n = n()) |> filter(n >1)

# merge to party*days
df.daily <- left_join(df.daily, df.family,
                      by = c("parlgov_id"))


# EXTREME WEATHER EVENTS -------------------------------------------------------
# load data
df.disasters_deadly <- read_csv(here("data/data_output/disasters_daily.csv")) |> 
  select(country_name, date, disaster_id, type, deaths)

# merge to party*days
df.daily <- left_join(df.daily, df.disasters_deadly,
                      by = c("country_name", "date")) |> 
  
  # create different treatment indicator for different event types
  mutate(
    treat = ifelse(!is.na(disaster_id), 1, 0),
    treat_fire = ifelse(treat == 1 & type == "Wildfire", 1, 0),
    treat_flood = ifelse(treat == 1 & type == "Flood", 1, 0),
    treat_storm = ifelse(treat == 1 & type == "Storm", 1, 0),
    treat_temp = ifelse(treat == 1 & type == "Extreme temperature", 1, 0)
  )

# add indicator for transboundary events
df.scraping_bounds <- read_csv(here("data/data_input/partypress.csv")) |> 
  filter(date < as.Date("2021-01-01")) |> 
  group_by(country_name) |> 
  summarize(max_date = max(date))

df.disasters <- read_csv(here("data/data_output/disasters_daily.csv"))  

df.disasters <- left_join(df.disasters, df.scraping_bounds,
                          by = "country_name")

df.disasters <- df.disasters |> 
  group_by(disaster_id, country_name, type) |> 
  filter(start_date <= max_date) |> 
  summarize(start_date = min(start_date) |> as.character(), 
            end_date = max(end_date) |> as.character(),
            total_deaths = sum(deaths, na.rm = T) |> as.character(),
            total_damages = sum(damages) |> as.character()
  ) |> 
  mutate(total_damages = ifelse(is.na(total_damages), "NA", as.character(total_damages))) |> 
  ungroup() |> 
  select(!disaster_id)

# Transboundary events
transboundary <- df.disasters |> 
  group_by(start_date) |> 
  count() |> 
  filter(n >1) |> 
  pull(start_date)

df.trans <- df.disasters |> 
  filter(start_date %in% transboundary) |> 
  arrange(start_date) |> 
  select(country_name, type, start_date, end_date)

# Expand 
df.trans <- df.trans |> 
  select(country_name, start_date, end_date) |> 
  group_by(country_name, start_date) |> 
  mutate(date = list(seq.Date(as.Date(start_date), as.Date(end_date), 
                              by = "day")))  |> 
  unnest(cols = c(date)) |> 
  ungroup() |> 
  select(country_name, date) |> 
  mutate(treat_trans = 1)

# join
df.daily <- left_join(df.daily, df.trans,
                      by = c("country_name", "date"))

df.daily <- df.daily |> 
  mutate(treat_trans = ifelse(is.na(treat_trans), 0, treat_trans),
         treat_trans = ifelse(is.na(treat), NA, treat_trans))


# HANDLING NAS -----------------------------------------------------------------
## 1) true NA if not in parliament ATM (cabinet_id missing)
df.daily <- df.daily |> 
  mutate_at(
    # all variables that should be 0 not NA
    vars(starts_with("issue_"), pr_total), 
    # replace with 0 as long as there's cabinet id
    funs(ifelse(is.na(.) & !is.na(cabinet_id), 0, .))
  )

# 2) true NA if we didn't scrape
# extract scraping bounds
df.scraping_bounds <- df.all |> 
  group_by(country_name) |> 
  summarize(max_date = max(date))

# merge back to party*days
df.daily <- left_join(df.daily, df.scraping_bounds,
                      by = c("country_name"))

df.daily <- df.daily |> 
  mutate_at(
    # all variables that should be NA not 0
    vars(starts_with("issue_"), pr_total), 
    # replace all Vars with NA that lie outside of scraping bound
    funs(ifelse(date > max_date, NA, .))
  ) |> 
  select(!max_date)


# CONSECUTIVE TIME COUNTER -----------------------------------------------------
t_days <- df.daily |> 
  group_by(date) |> 
  summarize(n = n()) |> 
  mutate(t = row_number()) |> 
  select(date, t)

# join t counter to data
df.daily <- left_join(df.daily, t_days,
                      by ="date")

# check if party-days are unique
df.daily |> group_by(parlgov_id, t) |> count() |> filter(n > 1)


# ADD CLIMATE DICTIONARY -------------------------------------------------------
df.dic_climate <- read_csv(here("data/data_output/daily_dictionary_climate.csv"))

df.daily <- left_join(df.daily, df.dic_climate,
                      by = c("parlgov_id", "date"))

# handle NAs: if pr_total is NA then beyond scraping bounds
df.daily <- df.daily |> 
  mutate(climate_words = ifelse(is.na(climate_words) & !is.na(pr_total),
                                0, climate_words),
         climate_prs = ifelse(is.na(climate_prs) & !is.na(pr_total),
                                0, climate_prs))


# ADD RELIEF DICTIONARY --------------------------------------------------------
df.dic_relief <- read_csv(here("data/data_output/daily_dictionary_relief.csv"))

df.daily <- left_join(df.daily, df.dic_relief,
                      by = c("parlgov_id", "date"))

# handle NAs: if pr_total is NA then beyond scraping bounds
df.daily <- df.daily |> 
  mutate(relief_words = ifelse(is.na(relief_words) & !is.na(pr_total),
                                0, relief_words),
         relief_prs = ifelse(is.na(relief_prs) & !is.na(pr_total),
                              0, relief_prs))


# EXPORT -----------------------------------------------------------------------
write_csv(df.daily,
          file = here("data/data_output/press_daily.csv"))